import delimited "/Users/natashasarin/Dropbox/Z__Natamir/Natasha_Research/Banks/Price_Earnings/Data/CSV/20160726_price.csv", encoding(ISO-8859-1)clear

gen newdate = date(date, "MDY")
gen quarter = quarter(newdate)
gen month = month(newdate)
gen year = year(newdate)
gen day = day(newdate)

gen daily_date = newdate 
gen daily_date_string = date 

tostring year, gen(year_string)
tostring quarter, gen(quarter_string)
gen year_quarter_string = year_string + quarter_string

gen cusip_short = substr(cusip, 1, 8)
drop cusip
rename cusip_short cusip 

save "/Users/natashasarin/Dropbox/Z__Natamir/Natasha_Research/Banks/Price_Earnings/Data/20160726_price.dta", replace

import delimited "/Users/natashasarin/Dropbox/Z__Natamir/Natasha_Research/Banks/Price_Earnings/Data/CSV/20160726_earnings.csv", encoding(ISO-8859-1)clear

gen newdate = date(datadate, "MDY")
gen quarter = quarter(newdate)
gen month = month(newdate)
gen year = year(newdate)
gen day = day(newdate)

tostring year, gen(year_string)
tostring quarter, gen(quarter_string)
gen year_quarter_string = year_string + quarter_string

gen cusip_short = substr(cusip, 1, 8)
drop cusip
rename cusip_short cusip 

duplicates tag cusip year_quarter_string, gen(duplicates)
drop if duplicates == 1 

save "/Users/natashasarin/Dropbox/Z__Natamir/Natasha_Research/Banks/Price_Earnings/Data/20160726_earnings.dta", replace

merge 1:m cusip year_quarter_string using "/Users/natashasarin/Dropbox/Z__Natamir/Natasha_Research/Banks/Price_Earnings/Data/20160726_price.dta"

drop if gvkey == . 
br if _merge != 3

save "/Users/natashasarin/Dropbox/Z__Natamir/Natasha_Research/Banks/Price_Earnings/Data/20160726_price_earnings_merge.dta", replace

clear
use "/Users/natashasarin/Dropbox/Z__Natamir/Natasha_Research/Banks/Price_Earnings/Data/20160726_price_earnings_merge.dta"

*** want to do for pre-crisis, crisis, post-crisis 
/*gen crisis = . 
replace crisis = 0 if year >= 2002 & year <= 2006 
replace crisis = 1 if (year >= 2007 & year < 2010) | (year == 2010 & month == 1) 
replace crisis = 2 if (year >= 2010) | (year == 2010 & month > 1) 
replace crisis = 3 if year == 2015 & month == 12 & day == 31*/

*** let's get average pe ratio pre and post-crisis 
gen crisis = . 
replace crisis = 0 if (year >= 2002 & year <= 2007)
replace crisis = 1 if (year>=2010 & year <= 2015) 
replace crisis = 2 if year == 2015 

gen big6 = 0 
replace big6 = 1 if permno == 86868 | permno == 47896 | permno == 69032 | permno == 59408 | permno == 38703 | permno == 70519

drop if _merge != 3 

gen pe_ratio = prc/epsf12

by cusip crisis, sort: egen pe_avg=mean(pe_ratio)
capture drop output_indicator
by cusip crisis, sort: gen output_indicator = _n 

br cusip tic crisis pe_avg if big6 == 1 & output_indicator == 1 

**** LHS wants to plot PE ratio over time for each of the Big 6
by cusip daily_date, sort: gen test = _n 
gen daily_month = month(daily_date)
gen daily_day = day(daily_date)
gen daily_year = year(daily_date)

format daily_date %td



*** just need to do each of the graphs, title and name and then iterate through. make sure to explain we have weird earnings numbers and the methodology for doing the PE ratio 
keep if big6 == 1 
egen id = group(cusip)

qui sum id
local id_min `=r(min)'
local id_max `=r(max)'

foreach id of numlist `id_min'/`id_max' {
capture drop id_prime
gen id_prime=1 if id==`id'
replace id_prime=0 if id!=`id'
gsort -id_prime
local bankname = tic[1]
gsort daily_date 
graph twoway line pe_ratio daily_date if id==`id', xtitle("") title("PE Ratio for `bankname' Over Time") 
graph export `bankname'_pe.png, replace
graph close
}



/*capture drop id_prime
gen id_prime=1 if id==`id'
replace id_prime=0 if id!=`id'
gsort -id_prime*/

*** for our next merge with rajan, we need to merge on month year variable 
capture drop month_string 
capture drop daily_year 
capture drop daily_month 
capture drop year_string
gen daily_year = year(daily_date)
gen daily_month = month(daily_date)

tostring daily_year, gen(year_string)
tostring daily_month, gen(month_string)
gen year_month_string = year_string + month_string

drop _merge
save "/Users/natashasarin/Dropbox/Z__Natamir/Natasha_Research/Banks/Price_Earnings/Data/20160726_price_earnings_merge.dta", replace


*** we can also do the basic Rajan measure 
import delimited "/Users/natashasarin/Dropbox/Z__Natamir/Natasha_Research/Banks/Price_Earnings/Data/CSV/20160726_index.csv", encoding(ISO-8859-1)clear

gen newdate = date(datadate, "MDY")
gen quarter = quarter(newdate)
gen month = month(newdate)
gen year = year(newdate)
gen day = day(newdate)
format newdate %td

tostring year, gen(year_string)
tostring month, gen(month_string)
gen year_month_string = year_string + month_string

gen pe_index = prccm/epsx12

save "/Users/natashasarin/Dropbox/Z__Natamir/Natasha_Research/Banks/Price_Earnings/Data/20160726_index.dta", replace

merge m:m year_month_string using "/Users/natashasarin/Dropbox/Z__Natamir/Natasha_Research/Banks/Price_Earnings/Data/20160726_price_earnings_merge.dta"


**** now we want to make our next set of graphs that do the scaling by S&P per 

gen pe_ratio_div = pe_ratio/pe_index

foreach id of numlist `id_min'/`id_max' {
capture drop id_prime
gen id_prime=1 if id==`id'
replace id_prime=0 if id!=`id'
gsort -id_prime
local bankname = ticker[1]
gsort daily_date 
graph twoway line pe_ratio_div daily_date if id==`id', xtitle("") title("PE Ratio for `bankname' Over Time, In Percent of S&P 500 P/E Ratios", size(small)) 
graph export `bankname'_pe_div.png, replace
graph close
}

save "/Users/natashasarin/Dropbox/Z__Natamir/Natasha_Research/Banks/Price_Earnings/Data/20160726_index_merge.dta", replace

drop output_indicator 
by cusip crisis, sort: egen pe_ratio_avg=mean(pe_ratio_div)
capture drop output_indicator
by cusip crisis, sort: gen output_indicator = _n 

br cusip tic comnam crisis pe_ratio_avg if big6 == 1 & output_indicator == 1 
